Code
Table1.2_total <- Delays1 %>%
summarise(
.by = NULL,
flight_period = "Total",
TotalFlights = n(),
TotalUniqueDates = n_distinct(fl_date),
TotalUniqueOrigins = n_distinct(origin),
TotalUniqueDestinations = n_distinct(dest),
AvgCRSDepTime = mean(crs_dep_time, na.rm = TRUE),
AvgDepTime = mean(dep_time, na.rm = TRUE),
AvgDepDelay = round(mean(dep_delay, na.rm = TRUE), 2),
AvgTaxiOut = round(mean(taxi_out, na.rm = TRUE), 2),
AvgTaxiIn = round(mean(taxi_in, na.rm = TRUE), 2),
AvgCRSArrTime = mean(crs_arr_time, na.rm = TRUE),
AvgArrTime = mean(arr_time, na.rm = TRUE),
AvgArrDelay = round(mean(arr_delay, na.rm = TRUE), 2),
AvgAirTime = round(mean(air_time, na.rm = TRUE), 2),
CancelledFlights = sum(cancelled, na.rm = TRUE),
DivertedFlights = sum(diverted, na.rm = TRUE),
AvgCarrierDelay = round(mean(carrier_delay, na.rm = TRUE), 2),
AvgSecurityDelay = round(mean(security_delay, na.rm = TRUE), 2),
AvgWeatherDelay = round(mean(weather_delay, na.rm = TRUE), 2),
AvgNASDelay = round(mean(nas_delay, na.rm = TRUE), 2),
AvgLateAircraftDelay = round(mean(lateaircraft_delay, na.rm = TRUE), 2),
CarrierDelay_ct = sum(carrier_delay > 0),
SecurityDelay_ct = sum(security_delay > 0),
WeatherDelay_ct = sum(weather_delay > 0),
NASDelay_ct = sum(nas_delay > 0),
LateAircraftDelay_ct = sum(lateaircraft_delay > 0)) %>%
mutate(
TotalFlightsCount = sprintf("%d (100%%)", TotalFlights),
CancelledFlightsCount = sprintf("%d (100%%)", CancelledFlights),
DivertedFlightsCount = sprintf("%d (100%%)", DivertedFlights),
CarrierDelayCount = sprintf("%d (100%%)", CarrierDelay_ct),
SecurityDelayCount = sprintf("%d (100%%)", SecurityDelay_ct),
WeatherDelayCount = sprintf("%d (100%%)", WeatherDelay_ct),
NASDelayCount = sprintf("%d (100%%)", NASDelay_ct),
LateAircraftDelayCount = sprintf("%d (100%%)", LateAircraftDelay_ct)
)
Table1.2_combined <- bind_rows(Table1.2, Table1.2_total)
library(lubridate)
# Converting time HHMM.SS to HH:MM:SS
convert_to_time <- function(time_val) {
rounded_time <- round(time_val, 2)
hours <- floor(rounded_time / 100)
minutes_with_secs <- (rounded_time %% 100)
minutes <- floor(minutes_with_secs)
seconds <- round((minutes_with_secs - minutes) * 60, 0)
time_formatted <- sprintf("%02d:%02d:%02d", hours, minutes, seconds)
return(time_formatted)
}
#Apply time conversion, remove extra rows
Table1.3_combined <- Table1.2_combined %>%
mutate(
AvgCRSDepTime = sapply(AvgCRSDepTime, convert_to_time),
AvgDepTime = sapply(AvgDepTime, convert_to_time),
AvgCRSArrTime = sapply(AvgCRSArrTime, convert_to_time),
AvgArrTime = sapply(AvgArrTime, convert_to_time),
) %>%
mutate(across(-flight_period, as.character)
) %>%
select(
flight_period,
TotalFlightsCount,
CancelledFlightsCount,
DivertedFlightsCount,
AvgCRSDepTime,
AvgDepTime,
AvgDepDelay,
AvgTaxiOut,
AvgTaxiIn,
AvgCRSArrTime,
AvgArrTime,
AvgArrDelay,
AvgAirTime,
CarrierDelayCount,
SecurityDelayCount,
WeatherDelayCount,
NASDelayCount,
LateAircraftDelayCount
)
#Pivot table
Table1.3_pivoted <- Table1.3_combined %>%
pivot_longer(
cols = -flight_period,
names_to = "Statistic",
values_to = "Value") %>%
pivot_wider(
names_from = flight_period,
values_from = Value
)
#gt Table1
Table1.3_pivoted %>%
gt() %>%
tab_header(
title = "Flight Delay Summary by Flight Period"
) %>%
cols_label(
Statistic = "Flight Period",
Morning = "Morning",
Afternoon = "Afternoon",
Evening = "Evening",
Total = "Total"
) %>%
tab_spanner(
label = "Flight Period",
columns = c(Morning, Afternoon, Evening, Total)
) %>%
tab_style(
style = list(
cell_text(color = "white"),
cell_fill(color = "rgba(0, 43, 54, 1)")
),
locations = cells_body(
columns = everything()
)
) %>%
tab_style(
style = list(
cell_text(color = "white"),
cell_fill(color = "rgba(0, 43, 54, 1)")
),
locations = cells_column_labels(
columns = everything()
)
) %>%
tab_style(
style = list(
cell_text(color = "white", weight = "bold"),
cell_fill(color = "rgba(0, 43, 54, 1)")
),
locations = cells_title(
groups = c("title", "subtitle")
)
) %>%
tab_style(
style = list(
cell_text(color = "white", weight = "bold"),
cell_fill(color = "rgba(0, 43, 54, 1)")
),
locations = cells_column_spanners(
spanners = everything()
)
) %>%
tab_source_note(
source_note = "Table 1: Summary includes morning, afternoon, and evening flight periods."
) %>%
tab_style(
style = list(
cell_text(color = "white"),
cell_fill(color = "rgba(0, 43, 54, 1)")
),
locations = cells_source_notes()
)| Flight Delay Summary by Flight Period | ||||
|---|---|---|---|---|
| Flight Period |
Flight Period
|
|||
| Morning | Afternoon | Evening | Total | |
| TotalFlightsCount | 1246031 (41.5%) | 1423140 (47.4%) | 330829 (11.0%) | 3000000 (100%) |
| CancelledFlightsCount | 30690 (38.8%) | 38343 (48.4%) | 10107 (12.8%) | 79140 (100%) |
| DivertedFlightsCount | 2555 (36.2%) | 3901 (55.3%) | 600 (8.5%) | 7056 (100%) |
| AvgCRSDepTime | 08:49:31 | 15:73:19 | 20:66:23 | 13:27:04 |
| AvgDepTime | 08:53:58 | 15:89:05 | 20:12:40 | 13:29:47 |
| AvgDepDelay | 5.23 | 12.93 | 16.51 | 10.12 |
| AvgTaxiOut | 16.87 | 16.44 | 16.65 | 16.64 |
| AvgTaxiIn | 7.75 | 7.78 | 6.95 | 7.68 |
| AvgCRSArrTime | 10:87:15 | 17:85:11 | 17:42:14 | 14:90:34 |
| AvgArrTime | 10:86:01 | 17:71:56 | 15:89:47 | 14:66:31 |
| AvgArrDelay | -0.77 | 7.34 | 10.04 | 4.26 |
| AvgAirTime | 114.12 | 109.8 | 116.31 | 112.31 |
| CarrierDelayCount | 86824 (29.2%) | 162266 (54.6%) | 47861 (16.1%) | 296951 (100%) |
| SecurityDelayCount | 887 (32.1%) | 1434 (52.0%) | 438 (15.9%) | 2759 (100%) |
| WeatherDelayCount | 8380 (26.7%) | 18758 (59.7%) | 4290 (13.7%) | 31428 (100%) |
| NASDelayCount | 80604 (31.4%) | 144366 (56.3%) | 31507 (12.3%) | 256477 (100%) |
| LateAircraftDelayCount | 42721 (16.5%) | 168902 (65.2%) | 47391 (18.3%) | 259014 (100%) |
| Table 1: Summary includes morning, afternoon, and evening flight periods. | ||||